
##this is our main workhorse regression runner
##it takes a DV and regresses on either
##(a) the 1890 covariates or 
##(b) the 1890 covariates + contemporary controls

reg_runner_rev <- function(varname, covars = "pretreat", dataset){
  if (covars == "pretreat"){
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ log(numPost + 1) + log(tot_pop) + log(tot_pop/area) +
              sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
              MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }else{
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ log(numPost + 1) + log(tot_pop) + log(tot_pop/area) + 
              sh_black_pop + sh_foreign_born_wh_1890 +  log(rail_length + 1) + 
              MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
              log(IncPC_2010) + I(`% non-Hispanic white`) +
              I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }
}


##this function does the same thing but uses the residualization technique
reg_runner_resid <- function(varname, covars = "pretreat", dataset){
  
  resid <- lm(log(numPost + 1) ~ log(tot_pop) + log(rail_length + 1) + log(canal_length + 1) +
                log(river_length + 1) + MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1888, 
              data = dataset)
  dataset$post_resid <- NA
  dataset$post_resid[as.numeric(names(predict(resid)))] <- resid$residuals
  
  if (covars == "pretreat"){
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ post_resid + log(tot_pop) + log(tot_pop/area) +
              sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
              MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1888 + 
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }else{
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ post_resid + log(tot_pop) + log(tot_pop/area) + 
              sh_black_pop + sh_foreign_born_wh_1890 +  log(rail_length + 1) + 
              MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1888 + 
              log(IncPC_2010) + I(`% non-Hispanic white`) +
              I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }
}

##this is the version of the regression runner where newspapers are the main IV
reg_runner_news <- function(varname, dataset){
  dv <- (dataset[,varname]) 
  if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
    dv <- log(dataset[,varname] + 1) 
  }
  if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
    dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
  }
  m <- lm(scale(dv) ~ log(all_papers + 1) + log(tot_pop) + log(tot_pop/area) +
            sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
            MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
            log(IncPC_2010) + I(`% non-Hispanic white`) +
            I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
            factor(state), data = dataset)
  summary(m)$coef[2,1:2]
}

reg_runner_news_change <- function(varname, dataset){
  dv <- (dataset[,varname]) 
  if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
    dv <- log(dataset[,varname] + 1) 
  }
  if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
    dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
  }
  m <- lm(scale(dv) ~ I(1000*(all_papers - all_papers1884)/Pop_2010) + log(tot_pop) + log(tot_pop/area) +
            sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
            MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
            log(IncPC_2010) + I(`% non-Hispanic white`) +
            I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
            factor(state), data = dataset)
  summary(m)$coef[2,1:2]
}

reg_runner_news_rev_levels <- function(varname, dataset){
  dv <- (dataset[,varname]) 
  if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
    dv <- log(dataset[,varname] + 1) 
  }
  m <- lm(scale(dv) ~ I(1000*(all_papers)/Pop_2010) + log(numPost + 1) + log(tot_pop) + log(tot_pop/area) +
            sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
            MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
            log(IncPC_2010) + I(`% non-Hispanic white`) +
            I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
            factor(state), data = dataset)
  summary(m)$coef[2,1:2]
}

reg_runner_news_1884 <- function(varname, dataset){
  dv <- (dataset[,varname]) 
  if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
    dv <- log(dataset[,varname] + 1) 
  }
  m <- lm(scale(dv) ~ I(1000*(all_papers1884)/tot_pop) + log(numPost + 1) + log(tot_pop) + log(tot_pop/area) +
            sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
            MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
            log(IncPC_2010) + I(`% non-Hispanic white`) +
            I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
            factor(state), data = dataset)
  summary(m)$coef[2,1:2]
}


##this is the version of the regression runner where we use previous decades
reg_runner_alt <- function(varname, covars = "pretreat", dataset){
  if (covars == "pretreat"){
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ log(numPost + 1) + log(tot_pop) + log(tot_pop/area) +
              sh_black_pop + sh_foreign_born_wh_1890 + log(rail_length + 1) +
              MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1884 +
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }else{
    dv <- (dataset[,varname]) 
    if (varname %in% c("AllOrgs","Putnam","Olson","nccs2014","Mortality","Premature mortality rate")){
      dv <- log(dataset[,varname] + 1) 
    }
    if (varname %in% c("DRUGTOT", "MURDER", "RAPE", "GRNDTOT")){
      dv <- log(1e5*(dataset[,varname]+1)/dataset$Pop_2010) 
    }
    m <- lm(scale(dv) ~ log(numPost + 1) + log(tot_pop) + log(tot_pop/area) + 
              sh_black_pop + sh_foreign_born_wh_1890  +
              log(IncPC_2010) + I(`% non-Hispanic white`) +
              I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
              factor(state), data = dataset)
    summary(m)$coef[2,1:2]
  }
}